////////////////////////////////////////
// 				Settings			  //
////////////////////////////////////////

// Stata settings:
capture log close
clear all
set more off
version 15

// Directories:
global path "C:\Stata"	//directory where all files must be in
cd "${path}"

// look of graphs, load packages
set matsize 1000, perm
graph set window fontface "Times New Roman"
ssc install estout
ssc install egenmore

use "${path}\fulldataset_study1.dta", clear

//////////////
// Analyses //
//////////////
xtset user_id order //declare as panel dataset

//Standardize for easy comparison of coefficient size 
foreach i of varlist inv return_avg sd skew value_VS value_ST value_CPT corr_VS corr_CPT corr_ST return risky path_jump-path_rel_spread {
	qui sum `i'
	local mu=r(mean)
	local sd=r(sd)
	qui replace `i' = (`i'-`mu') / `sd'
}

// Regression analysis: Table 3 
xtreg inv corr_VS, robust cluster(user_id) fe //Naked model
est store A1
xtreg inv corr_VS return_avg sd skew path*, robust cluster(user_id) fe //All statistical features as controls
est store A2
xtreg inv corr_VS return_avg sd skew path* corr_CPT corr_ST, robust cluster(user_id) fe //All statistical features plus CPT and ST as controls
est store A3
qui sum inv
local ll = round(r(min),0.000001)
local ul = round(r(max),0.000001)
xttobit inv corr_VS return_avg sd skew path* corr_CPT corr_ST, ll(`ll') ul(`ul') //Tobit instead of OLS
est store A4
xtreg inv value_VS return_avg sd skew path* value_CPT value_ST, robust cluster(user_id) fe //Value function instead of correlations
est store A5
esttab A1 A2 A3 A4 A5 using "${path}\Table3.tex", se label starlevels(* 0.1 ** 0.05 *** 0.01) replace r2 sfmt(%9.4f)


//////////////////////
//Discussion 		//
//////////////////////

// Table 5: Predict invested amounts with beliefs as controls
xtreg inv corr_VS return risky, robust cluster(user_id) fe 
est store A1
xtreg inv corr_VS return risky return_avg sd skew path* corr_CPT corr_ST, robust cluster(user_id) fe //All statistical features plus CPT and ST as controls
est store A2
xtreg inv value_VS return risky return_avg sd skew path* corr_CPT corr_ST, robust cluster(user_id) fe //All statistical features plus CPT and ST as controls
est store A3
esttab A1 A2 A3 using "${path}\Table5.tex", se label starlevels(* 0.1 ** 0.05 *** 0.01) replace r2 sfmt(%9.4f)

//////////////////////////////////////////////////////////////////////
////	WARNING: THE FOLLOWING ANALYSES TAKE QUITE SOME TIME	//////
//////////////////////////////////////////////////////////////////////

//Bottom-Up and Top-down decision weights
//Explain Correlational measure (part of attractivenes attributed to decision weights) by statistical features
xtreg corr_VS return_avg sd skew, robust cluster(user_id) fe //VS, R² 0.28%
xtreg corr_CPT return_avg sd skew, robust cluster(user_id) fe //CPT, R² 87.98%
xtreg corr_ST return_avg sd skew, robust cluster(user_id) fe //ST, R² 66.71&

pwcorr corr_VS corr_CPT corr_ST, sig //Correlation between decision weights

save "${path}\fulldataset_study1_update.dta", replace

//////////////////////
//Linear combination//
//////////////////////

//Load dataset with all weights (VS, CPT, ST)
clear all
set maxvar 32767
import delimited using "${path}\weights.csv", clear case(lower) delimiters(",") varnames(1)

//Define CPT parameters
local alpha = 0.88
local lambda = 2.25

//Create time dummy
bysort permno: gen time = _n
sort permno time
//Create VS weights on returns as average between VS weights on prices
gen vsweights_avg = vsweights + vsweights[_n-1] if permno == permno[_n-1]

//Create actual weights (between 0 and 1, summing to 1)
foreach i in vsweights_avg stweights cptweights {
	egen help = total(`i'), by(permno)
	replace `i' = `i'/help
	drop help
}

drop if time == 1 //first price does not have any weight

//Calculate CPT value of daily returns
gen ret_value = ret_daily^`alpha' if ret_daily >= 0
replace ret_value = -`lambda'*((-1*ret_daily)^`alpha') if ret_daily < 0

//Generate linear combinations of VS weights with CPT and ST weights and calculate corresponding Correlation and value
forvalues i = 1/101 {
	local j = `i'-1
	local w = `j'/100
	local v = 1-`w'
	qui gen vscptweights_`j' = `w'*vsweights_avg+`v'*cptweights
	qui gen vsstweights_`j' = `w'*vsweights_avg+`v'*stweights
	egen help = total(vscptweights_`j'), by(permno)
	qui replace vscptweights_`j' = vscptweights_`j'/help
	drop help
	egen help = total(vsstweights_`j'), by(permno)
	qui replace vsstweights_`j' = vsstweights_`j'/help
	drop help
	egen vscptweights_`j'_c = corr(ret_daily vscptweights_`j'), by(permno)
	gen help = vscptweights_`j'*ret_value
	egen vscptweights_`j'_v = total(help), by(permno)
	drop help
	egen vsstweights_`j'_c = corr(ret_daily vsstweights_`j'), by(permno)
	gen help = vsstweights_`j'*ret_value
	egen vsstweights_`j'_v = total(help), by(permno)
	drop help
	qui drop vsstweights_`j'
	di "Weight: `j'"
}

by permno (date), sort: keep if _n == 1
rename permno id
keep id *_c *_v

save "${path}\linear_combinations.dta", replace

use "${path}\fulldataset_study1_update.dta", clear

//Merge datasets
merge m:1 id using "${path}\linear_combinations.dta", update
drop if _merge == 2
drop _merge

sort user_id order

//Standardize variables for easy comparison of coefficients
foreach i of varlist vscptweights_0_c-vsstweights_100_v {
	qui sum `i'
	local mu=r(mean)
	local sd=r(sd)
	qui replace `i' = (`i'-`mu') / `sd'
}


//Setup to store coefficients, standard errors, t-values, R² for different models
gen coeff_vs = .
gen se_vs = .
gen t_vs = .
gen p_vs = .
gen lo_coeff_vs = .
gen hi_coeff_vs = .
gen r2_cpt_c = .
gen r2_cpt_v = .
gen r2_st_c = .
gen r2_st_v = .
gen w = .

//Run models for all VS - CPT and VS - ST combinations, store values
forvalues i = 1/101 {
	local j = `i'-1
	qui replace w = `j' in `i'
	qui xtreg inv vscptweights_`j'_c, robust cluster(user_id) fe 
	qui replace r2_cpt_c = e(r2_w) in `i'
	qui xtreg inv vsstweights_`j'_c, robust cluster(user_id) fe 
	qui replace r2_st_c = e(r2_w) in `i'
	qui xtreg inv vscptweights_`j'_v, robust cluster(user_id) fe 
	qui replace r2_cpt_v = e(r2_w) in `i'
	qui xtreg inv vsstweights_`j'_v, robust cluster(user_id) fe 
	qui replace r2_st_v = e(r2_w) in `i'	
	di "Weight: `j'"
}

//Identify optimal combination (highest R²)
foreach i in cpt_c cpt_v st_c st_v {
	egen help = max(r2_`i')
	di "`i'"
	sum help
	sum w if r2_`i' == help
	local opt_w_`i' = r(mean)
	drop help
}

//Figure 3
twoway line r2_cpt_v w, xtitle("Weight on visual salience (in %)", size(vlarge)) ytitle("R²", size(vlarge)) xlab(0 (20) 100, labs(vlarge)) ylab(0.07 (0.001) 0.074, labs(vlarge)) 
graph export "${path}\Figure3_left.png", width(3000) replace

twoway line r2_st_v w, xtitle("Weight on visual salience (in %)", size(vlarge)) ytitle("R²", size(vlarge)) xlab(0 (20) 100, labs(vlarge)) ylab(0.03 (0.01) 0.08, labs(vlarge)) 
graph export "${path}\Figure3_right.png", width(3000) replace


//Regressions at optimal values for vs/cpt and vs/st combinations, with and without controls
xtreg inv vscptweights_`opt_w_cpt_v'_v, robust cluster(user_id) fe
xtreg inv vsstweights_`opt_w_st_v'_v, robust cluster(user_id) fe
xtreg inv vscptweights_`opt_w_cpt_v'_v return_avg sd skew path*, robust cluster(user_id) fe
xtreg inv vsstweights_`opt_w_st_v'_v return_avg sd skew path*, robust cluster(user_id) fe


//Discussion 2: Beliefs
import delimited using "${path}\weights.csv", clear case(lower) delimiters(",") varnames(1)

//Create time dummy
bysort permno: gen time = _n
sort permno time
//Create VS weights on returns as average between VS weights on prices
gen vsweights_avg = vsweights + vsweights[_n-1] if permno == permno[_n-1]

//Create actual weights (between 0 and 1, summing to 1)
egen help = total(vsweights_avg), by(permno)
replace vsweights_avg = vsweights_avg/help
drop help

//Calculate sum of weighted returns, reflecting linear utility model
gen help = vsweights_avg*ret_daily
egen vs_value_lin  = total(help), by(permno)
drop help

drop if time == 1 //first price does not have any weight

//Calculate return and risk predictions based on gamma-exponated VS weights
local gamma = 0
forvalues i = 0/100 {
	qui gen vs_w = vsweights_avg^`gamma'
	bys permno: egen help = total(vs_w)
	gen help2 = (vs_w*ret_daily)/help
	bys permno: egen vs_return_`i' = total(help2)
	gen help3 = vs_w*((ret_daily - vs_return_`i')^2)/help
	bys permno: egen vs_risk_`i' = total(help3)
	qui drop help help2 help3
	di "Gamma: `i'"
	local gamma = `gamma' + 0.01
	qui drop vs_w
}

by permno (date), sort: keep if _n == 1
rename permno id
keep id vs_return* vs_risk* vs_value_lin

save "${path}\gamma_predictions.dta", replace

use "${path}\fulldataset_study1_update.dta", clear

//Merge gamma data with original dataset
merge m:1 id using "${path}\gamma_predictions.dta", update
drop if _merge == 2
drop _merge

sort user_id order

//Standardize variables for easy comparison of coefficients
foreach i of varlist return risky vs_return_0 - vs_risk_100 {
	qui sum `i'
	local mu=r(mean)
	local sd=r(sd)
	qui replace `i' = (`i'-`mu') / `sd'
}

//Setup to store coefficients, standard errors, t-values, R² for different models
gen coeff_vs = .
gen se_vs = .
gen t_vs = .
gen p_vs = .
gen lo_coeff_vs = .
gen hi_coeff_vs = .
gen r2 = .
gen gamma = _n-1

//Run models predicting return for all gammas, controling for return and convexity deciles; store values
forvalues i = 0/100 {
	local j = `i'+1
	qui xtreg return vs_return_`i' return_decile convexity, robust cluster(user_id) fe
	qui replace r2 = e(r2_w) in `j'
	qui lincom vs_return_`i'
	qui replace coeff_vs = r(estimate) in `j'
	qui replace se_vs = r(se) in `j'
	qui replace t_vs = r(t) in `j'
	qui replace p_vs = r(p) in `j'
	qui replace lo_coeff_vs = r(lb) in `j'
	qui replace hi_coeff_vs = r(ub) in `j'
	di "`i'"
}

//Figure 4 left graph
preserve 
drop if gamma > 100
replace gamma = gamma / 100
twoway line r2 gamma, xtitle("Gamma", size(vlarge)) ytitle("R²", size(vlarge))  xlab(0 (0.2) 1, labs(vlarge)) ylab(0.181 (0.001) 0.184, labs(vlarge)) 
	graph export "${path}\Figure4_left.png", width(3000) replace
restore

//Show gamma with highest R²
egen help = max(r2)
tab gamma if help == r2
drop help

//Run models predicting risk for all gammas, controling for return and convexity deciles; store values
forvalues i = 0/100 {
	local j = `i'+1
	qui xtreg risky vs_risk_`i' return_decile convexity, robust cluster(user_id) fe
	qui replace r2 = e(r2_w) in `j'
	qui lincom vs_risk_`i'
	qui replace coeff_vs = r(estimate) in `j'
	qui replace se_vs = r(se) in `j'
	qui replace t_vs = r(t) in `j'
	qui replace p_vs = r(p) in `j'
	qui replace lo_coeff_vs = r(lb) in `j'
	qui replace hi_coeff_vs = r(ub) in `j'
	di "`i'"
}

//Figure 4 right graph
preserve 
drop if gamma > 100
replace gamma = gamma / 100
twoway line r2 gamma, xtitle("Gamma", size(vlarge)) ytitle("R²", size(vlarge)) xlab(0 (0.2) 1, labs(vlarge)) ylab(0.1776 (0.0003) 0.1786, labs(vlarge)) 
	graph export "${path}\Figure4_right.png", width(3000) replace
restore 

//Show gamma with highest R²
egen help = max(r2)
tab gamma if help == r2
drop help
